[<tf.Tensor 'Squeeze:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_1:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_2:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_3:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_4:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_5:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_6:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_7:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_8:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_9:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_10:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_11:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_12:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_13:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_14:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_15:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_16:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_17:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_18:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_19:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_20:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_21:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_22:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_23:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_24:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_25:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_26:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_27:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_28:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_29:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_30:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_31:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_32:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_33:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_34:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_35:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_36:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_37:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_38:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_39:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_40:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_41:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_42:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_43:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_44:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_45:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_46:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_47:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_48:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_49:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_50:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_51:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_52:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_53:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_54:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_55:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_56:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_57:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_58:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_59:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_60:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_61:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_62:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_63:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_64:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_65:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_66:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_67:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_68:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_69:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_70:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_71:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_72:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_73:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_74:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_75:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_76:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_77:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_78:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_79:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_80:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_81:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_82:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_83:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_84:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_85:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_86:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_87:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_88:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_89:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_90:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_91:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_92:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_93:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_94:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_95:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_96:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_97:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_98:0' shape=(100, 83) dtype=float32>, <tf.Tensor 'Squeeze_99:0' shape=(100, 83) dtype=float32>]
Epoch 1/20 Iteration 1/3560 Training loss: 4.4203 4.0944 sec/batch
Epoch 1/20 Iteration 2/3560 Training loss: 4.3773 3.1475 sec/batch
Epoch 1/20 Iteration 3/3560 Training loss: 4.2117 3.1793 sec/batch
Epoch 1/20 Iteration 4/3560 Training loss: 4.5463 3.3459 sec/batch
Epoch 1/20 Iteration 5/3560 Training loss: 4.5113 3.3899 sec/batch
Epoch 1/20 Iteration 6/3560 Training loss: 4.4018 3.3875 sec/batch
Epoch 1/20 Iteration 7/3560 Training loss: 4.3085 3.4705 sec/batch
Epoch 1/20 Iteration 8/3560 Training loss: 4.2202 3.4331 sec/batch
Epoch 1/20 Iteration 9/3560 Training loss: 4.1417 3.4952 sec/batch
Epoch 1/20 Iteration 10/3560 Training loss: 4.0747 3.4367 sec/batch
Epoch 1/20 Iteration 11/3560 Training loss: 4.0132 3.5550 sec/batch
Epoch 1/20 Iteration 12/3560 Training loss: 3.9613 3.4849 sec/batch
Epoch 1/20 Iteration 13/3560 Training loss: 3.9141 3.4289 sec/batch
Epoch 1/20 Iteration 14/3560 Training loss: 3.8756 3.5894 sec/batch
Epoch 1/20 Iteration 15/3560 Training loss: 3.8415 3.4762 sec/batch
Epoch 1/20 Iteration 16/3560 Training loss: 3.8100 3.4985 sec/batch
Epoch 1/20 Iteration 17/3560 Training loss: 3.7793 3.6177 sec/batch
Epoch 1/20 Iteration 18/3560 Training loss: 3.7534 3.4521 sec/batch
Epoch 1/20 Iteration 19/3560 Training loss: 3.7293 3.6464 sec/batch
Epoch 1/20 Iteration 20/3560 Training loss: 3.7057 3.5313 sec/batch
Epoch 1/20 Iteration 21/3560 Training loss: 3.6846 3.4526 sec/batch
Epoch 1/20 Iteration 22/3560 Training loss: 3.6654 3.4218 sec/batch
Epoch 1/20 Iteration 23/3560 Training loss: 3.6469 3.4166 sec/batch
Epoch 1/20 Iteration 24/3560 Training loss: 3.6299 3.4201 sec/batch
Epoch 1/20 Iteration 25/3560 Training loss: 3.6138 3.4484 sec/batch
Epoch 1/20 Iteration 26/3560 Training loss: 3.5994 3.2432 sec/batch
Epoch 1/20 Iteration 27/3560 Training loss: 3.5862 3.3115 sec/batch
Epoch 1/20 Iteration 28/3560 Training loss: 3.5727 3.4715 sec/batch
Epoch 1/20 Iteration 29/3560 Training loss: 3.5605 3.2776 sec/batch
Epoch 1/20 Iteration 30/3560 Training loss: 3.5490 3.2820 sec/batch
Epoch 1/20 Iteration 31/3560 Training loss: 3.5388 3.2877 sec/batch
Epoch 1/20 Iteration 32/3560 Training loss: 3.5281 3.2755 sec/batch
Epoch 1/20 Iteration 33/3560 Training loss: 3.5183 3.3306 sec/batch
Epoch 1/20 Iteration 34/3560 Training loss: 3.5092 3.2708 sec/batch
Epoch 1/20 Iteration 35/3560 Training loss: 3.5003 3.2621 sec/batch
Epoch 1/20 Iteration 36/3560 Training loss: 3.4920 3.3265 sec/batch
Epoch 1/20 Iteration 37/3560 Training loss: 3.4831 3.4007 sec/batch
Epoch 1/20 Iteration 38/3560 Training loss: 3.4750 3.2967 sec/batch
Epoch 1/20 Iteration 39/3560 Training loss: 3.4671 3.2557 sec/batch
Epoch 1/20 Iteration 40/3560 Training loss: 3.4596 3.2805 sec/batch
Epoch 1/20 Iteration 41/3560 Training loss: 3.4523 3.2881 sec/batch
Epoch 1/20 Iteration 42/3560 Training loss: 3.4455 3.3421 sec/batch
Epoch 1/20 Iteration 43/3560 Training loss: 3.4389 3.3302 sec/batch
Epoch 1/20 Iteration 44/3560 Training loss: 3.4326 3.2825 sec/batch
Epoch 1/20 Iteration 45/3560 Training loss: 3.4264 3.2785 sec/batch
Epoch 1/20 Iteration 46/3560 Training loss: 3.4208 3.3821 sec/batch
Epoch 1/20 Iteration 47/3560 Training loss: 3.4155 3.3385 sec/batch
Epoch 1/20 Iteration 48/3560 Training loss: 3.4104 3.2458 sec/batch
Epoch 1/20 Iteration 49/3560 Training loss: 3.4055 3.4211 sec/batch
Epoch 1/20 Iteration 50/3560 Training loss: 3.4007 3.3017 sec/batch
Epoch 1/20 Iteration 51/3560 Training loss: 3.3961 3.2250 sec/batch
Epoch 1/20 Iteration 52/3560 Training loss: 3.3914 3.3369 sec/batch
Epoch 1/20 Iteration 53/3560 Training loss: 3.3872 3.2538 sec/batch
Epoch 1/20 Iteration 54/3560 Training loss: 3.3826 3.2624 sec/batch
Epoch 1/20 Iteration 55/3560 Training loss: 3.3785 3.3563 sec/batch
Epoch 1/20 Iteration 56/3560 Training loss: 3.3741 3.3434 sec/batch
Epoch 1/20 Iteration 57/3560 Training loss: 3.3701 3.2681 sec/batch
Epoch 1/20 Iteration 58/3560 Training loss: 3.3663 3.3286 sec/batch
Epoch 1/20 Iteration 59/3560 Training loss: 3.3624 3.3281 sec/batch
Epoch 1/20 Iteration 60/3560 Training loss: 3.3589 3.2530 sec/batch
Epoch 1/20 Iteration 61/3560 Training loss: 3.3555 3.3959 sec/batch
Epoch 1/20 Iteration 62/3560 Training loss: 3.3525 3.3200 sec/batch
Epoch 1/20 Iteration 63/3560 Training loss: 3.3494 3.2664 sec/batch
Epoch 1/20 Iteration 64/3560 Training loss: 3.3458 3.3319 sec/batch
Epoch 1/20 Iteration 65/3560 Training loss: 3.3425 3.6591 sec/batch
Epoch 1/20 Iteration 66/3560 Training loss: 3.3397 3.2412 sec/batch
Epoch 1/20 Iteration 67/3560 Training loss: 3.3367 3.3525 sec/batch
Epoch 1/20 Iteration 68/3560 Training loss: 3.3333 3.2430 sec/batch
Epoch 1/20 Iteration 69/3560 Training loss: 3.3303 3.2619 sec/batch
Epoch 1/20 Iteration 70/3560 Training loss: 3.3275 3.2998 sec/batch
Epoch 1/20 Iteration 71/3560 Training loss: 3.3247 3.3164 sec/batch
Epoch 1/20 Iteration 72/3560 Training loss: 3.3222 3.2771 sec/batch
Epoch 1/20 Iteration 73/3560 Training loss: 3.3196 3.2040 sec/batch
Epoch 1/20 Iteration 74/3560 Training loss: 3.3170 3.4268 sec/batch
Epoch 1/20 Iteration 75/3560 Training loss: 3.3146 3.2925 sec/batch
Epoch 1/20 Iteration 76/3560 Training loss: 3.3123 3.3385 sec/batch
Epoch 1/20 Iteration 77/3560 Training loss: 3.3099 3.3537 sec/batch
Epoch 1/20 Iteration 78/3560 Training loss: 3.3076 3.2754 sec/batch
Epoch 1/20 Iteration 79/3560 Training loss: 3.3052 3.3905 sec/batch
Epoch 1/20 Iteration 80/3560 Training loss: 3.3027 3.3057 sec/batch
Epoch 1/20 Iteration 81/3560 Training loss: 3.3004 3.2055 sec/batch
Epoch 1/20 Iteration 82/3560 Training loss: 3.2982 3.3186 sec/batch
Epoch 1/20 Iteration 83/3560 Training loss: 3.2962 3.4378 sec/batch
Epoch 1/20 Iteration 84/3560 Training loss: 3.2941 3.2732 sec/batch
Epoch 1/20 Iteration 85/3560 Training loss: 3.2918 3.2462 sec/batch
Epoch 1/20 Iteration 86/3560 Training loss: 3.2895 3.3893 sec/batch
Epoch 1/20 Iteration 87/3560 Training loss: 3.2873 3.3168 sec/batch
Epoch 1/20 Iteration 88/3560 Training loss: 3.2851 3.3007 sec/batch
Epoch 1/20 Iteration 89/3560 Training loss: 3.2831 3.2732 sec/batch
Epoch 1/20 Iteration 90/3560 Training loss: 3.2812 3.2630 sec/batch
Epoch 1/20 Iteration 91/3560 Training loss: 3.2793 3.2838 sec/batch
Epoch 1/20 Iteration 92/3560 Training loss: 3.2772 3.3881 sec/batch
Epoch 1/20 Iteration 93/3560 Training loss: 3.2752 3.3141 sec/batch
Epoch 1/20 Iteration 94/3560 Training loss: 3.2732 3.2635 sec/batch
Epoch 1/20 Iteration 95/3560 Training loss: 3.2714 3.2736 sec/batch
Epoch 1/20 Iteration 96/3560 Training loss: 3.2696 3.2719 sec/batch
Epoch 1/20 Iteration 97/3560 Training loss: 3.2678 3.3724 sec/batch
Epoch 1/20 Iteration 98/3560 Training loss: 3.2659 3.3024 sec/batch
Epoch 1/20 Iteration 99/3560 Training loss: 3.2641 3.2506 sec/batch
Epoch 1/20 Iteration 100/3560 Training loss: 3.2622 3.3800 sec/batch
Epoch 1/20 Iteration 101/3560 Training loss: 3.2605 3.5121 sec/batch
Epoch 1/20 Iteration 102/3560 Training loss: 3.2587 3.2929 sec/batch
Epoch 1/20 Iteration 103/3560 Training loss: 3.2570 3.3131 sec/batch
Epoch 1/20 Iteration 104/3560 Training loss: 3.2552 3.2782 sec/batch
Epoch 1/20 Iteration 105/3560 Training loss: 3.2534 3.2842 sec/batch
Epoch 1/20 Iteration 106/3560 Training loss: 3.2517 3.2169 sec/batch
Epoch 1/20 Iteration 107/3560 Training loss: 3.2497 3.2946 sec/batch
Epoch 1/20 Iteration 108/3560 Training loss: 3.2479 3.2905 sec/batch
Epoch 1/20 Iteration 109/3560 Training loss: 3.2462 3.2638 sec/batch
Epoch 1/20 Iteration 110/3560 Training loss: 3.2442 3.4166 sec/batch
Epoch 1/20 Iteration 111/3560 Training loss: 3.2424 3.3242 sec/batch
Epoch 1/20 Iteration 112/3560 Training loss: 3.2407 3.3148 sec/batch
Epoch 1/20 Iteration 113/3560 Training loss: 3.2388 3.2992 sec/batch
Epoch 1/20 Iteration 114/3560 Training loss: 3.2369 3.3251 sec/batch
Epoch 1/20 Iteration 115/3560 Training loss: 3.2350 3.3106 sec/batch
Epoch 1/20 Iteration 116/3560 Training loss: 3.2330 3.2944 sec/batch
Epoch 1/20 Iteration 117/3560 Training loss: 3.2311 3.2768 sec/batch
Epoch 1/20 Iteration 118/3560 Training loss: 3.2293 3.2819 sec/batch
Epoch 1/20 Iteration 119/3560 Training loss: 3.2276 3.4295 sec/batch
Epoch 1/20 Iteration 120/3560 Training loss: 3.2265 3.2670 sec/batch
Epoch 1/20 Iteration 121/3560 Training loss: 3.2285 3.3240 sec/batch
Epoch 1/20 Iteration 122/3560 Training loss: 3.2299 3.4751 sec/batch
Epoch 1/20 Iteration 123/3560 Training loss: 3.2305 3.3124 sec/batch
Epoch 1/20 Iteration 124/3560 Training loss: 3.2303 3.2723 sec/batch
Epoch 1/20 Iteration 125/3560 Training loss: 3.2286 3.3146 sec/batch
Epoch 1/20 Iteration 126/3560 Training loss: 3.2268 3.3244 sec/batch
Epoch 1/20 Iteration 127/3560 Training loss: 3.2252 3.3654 sec/batch
Epoch 1/20 Iteration 128/3560 Training loss: 3.2236 3.3743 sec/batch
Epoch 1/20 Iteration 129/3560 Training loss: 3.2219 3.3421 sec/batch
Epoch 1/20 Iteration 130/3560 Training loss: 3.2203 3.2794 sec/batch
Epoch 1/20 Iteration 131/3560 Training loss: 3.2186 3.2852 sec/batch
Epoch 1/20 Iteration 132/3560 Training loss: 3.2168 3.2654 sec/batch
Epoch 1/20 Iteration 133/3560 Training loss: 3.2151 3.3362 sec/batch
Epoch 1/20 Iteration 134/3560 Training loss: 3.2132 3.3091 sec/batch
Epoch 1/20 Iteration 135/3560 Training loss: 3.2112 3.3010 sec/batch
Epoch 1/20 Iteration 136/3560 Training loss: 3.2092 3.2281 sec/batch
Epoch 1/20 Iteration 137/3560 Training loss: 3.2073 3.3207 sec/batch
Epoch 1/20 Iteration 138/3560 Training loss: 3.2053 3.3745 sec/batch
Epoch 1/20 Iteration 139/3560 Training loss: 3.2034 3.2559 sec/batch
Epoch 1/20 Iteration 140/3560 Training loss: 3.2014 3.3144 sec/batch
Epoch 1/20 Iteration 141/3560 Training loss: 3.1995 3.2672 sec/batch
Epoch 1/20 Iteration 142/3560 Training loss: 3.1974 3.2780 sec/batch
Epoch 1/20 Iteration 143/3560 Training loss: 3.1952 3.2748 sec/batch
Epoch 1/20 Iteration 144/3560 Training loss: 3.1931 3.2845 sec/batch
Epoch 1/20 Iteration 145/3560 Training loss: 3.1911 3.2591 sec/batch
Epoch 1/20 Iteration 146/3560 Training loss: 3.1890 3.3223 sec/batch
Epoch 1/20 Iteration 147/3560 Training loss: 3.1869 3.4200 sec/batch
Epoch 1/20 Iteration 148/3560 Training loss: 3.1849 3.2926 sec/batch
Epoch 1/20 Iteration 149/3560 Training loss: 3.1826 3.2940 sec/batch
Epoch 1/20 Iteration 150/3560 Training loss: 3.1804 3.2900 sec/batch
Epoch 1/20 Iteration 151/3560 Training loss: 3.1784 3.3903 sec/batch
Epoch 1/20 Iteration 152/3560 Training loss: 3.1764 3.3016 sec/batch
Epoch 1/20 Iteration 153/3560 Training loss: 3.1742 3.3006 sec/batch
Epoch 1/20 Iteration 154/3560 Training loss: 3.1720 3.2287 sec/batch
Epoch 1/20 Iteration 155/3560 Training loss: 3.1696 3.5967 sec/batch
Epoch 1/20 Iteration 156/3560 Training loss: 3.1673 3.4624 sec/batch
Epoch 1/20 Iteration 157/3560 Training loss: 3.1648 3.2147 sec/batch
Epoch 1/20 Iteration 158/3560 Training loss: 3.1624 3.3744 sec/batch
Epoch 1/20 Iteration 159/3560 Training loss: 3.1599 3.3026 sec/batch
Epoch 1/20 Iteration 160/3560 Training loss: 3.1575 3.2460 sec/batch
Epoch 1/20 Iteration 161/3560 Training loss: 3.1551 3.2866 sec/batch
Epoch 1/20 Iteration 162/3560 Training loss: 3.1525 3.2860 sec/batch
Epoch 1/20 Iteration 163/3560 Training loss: 3.1499 3.2505 sec/batch
Epoch 1/20 Iteration 164/3560 Training loss: 3.1475 3.2823 sec/batch
Epoch 1/20 Iteration 165/3560 Training loss: 3.1450 3.4730 sec/batch
Epoch 1/20 Iteration 166/3560 Training loss: 3.1425 3.2595 sec/batch
Epoch 1/20 Iteration 167/3560 Training loss: 3.1401 3.3301 sec/batch
Epoch 1/20 Iteration 168/3560 Training loss: 3.1376 3.2872 sec/batch
Epoch 1/20 Iteration 169/3560 Training loss: 3.1353 3.2955 sec/batch
Epoch 1/20 Iteration 170/3560 Training loss: 3.1328 3.2975 sec/batch
Epoch 1/20 Iteration 171/3560 Training loss: 3.1303 3.2900 sec/batch
Epoch 1/20 Iteration 172/3560 Training loss: 3.1280 3.2543 sec/batch
Epoch 1/20 Iteration 173/3560 Training loss: 3.1258 3.2478 sec/batch
Epoch 1/20 Iteration 174/3560 Training loss: 3.1235 3.3999 sec/batch
Epoch 1/20 Iteration 175/3560 Training loss: 3.1210 3.3017 sec/batch
Epoch 1/20 Iteration 176/3560 Training loss: 3.1185 3.2992 sec/batch
Epoch 1/20 Iteration 177/3560 Training loss: 3.1160 3.2686 sec/batch
Epoch 1/20 Iteration 178/3560 Training loss: 3.1132 3.2478 sec/batch
Epoch 2/20 Iteration 179/3560 Training loss: 2.6967 3.4045 sec/batch
Epoch 2/20 Iteration 180/3560 Training loss: 2.6524 3.2495 sec/batch
Epoch 2/20 Iteration 181/3560 Training loss: 2.6371 3.6040 sec/batch
Epoch 2/20 Iteration 182/3560 Training loss: 2.6313 3.6908 sec/batch
Epoch 2/20 Iteration 183/3560 Training loss: 2.6266 3.7741 sec/batch
Epoch 2/20 Iteration 184/3560 Training loss: 2.6231 3.4017 sec/batch
Epoch 2/20 Iteration 185/3560 Training loss: 2.6206 3.6091 sec/batch
Epoch 2/20 Iteration 186/3560 Training loss: 2.6196 3.4149 sec/batch
Epoch 2/20 Iteration 187/3560 Training loss: 2.6174 3.3985 sec/batch
Epoch 2/20 Iteration 188/3560 Training loss: 2.6141 3.2865 sec/batch
Epoch 2/20 Iteration 189/3560 Training loss: 2.6095 3.3321 sec/batch
Epoch 2/20 Iteration 190/3560 Training loss: 2.6079 3.2957 sec/batch
Epoch 2/20 Iteration 191/3560 Training loss: 2.6049 3.2967 sec/batch
Epoch 2/20 Iteration 192/3560 Training loss: 2.6043 3.4479 sec/batch
Epoch 2/20 Iteration 193/3560 Training loss: 2.6011 3.2671 sec/batch
Epoch 2/20 Iteration 194/3560 Training loss: 2.5988 3.4634 sec/batch
Epoch 2/20 Iteration 195/3560 Training loss: 2.5963 3.2785 sec/batch
Epoch 2/20 Iteration 196/3560 Training loss: 2.5960 3.2998 sec/batch
Epoch 2/20 Iteration 197/3560 Training loss: 2.5939 3.2480 sec/batch
Epoch 2/20 Iteration 198/3560 Training loss: 2.5902 3.3068 sec/batch
Epoch 2/20 Iteration 199/3560 Training loss: 2.5874 3.3064 sec/batch
Epoch 2/20 Iteration 200/3560 Training loss: 2.5868 3.2555 sec/batch
Validation loss: 2.45184 Saving checkpoint!
Epoch 2/20 Iteration 201/3560 Training loss: 2.5847 2.9235 sec/batch
Epoch 2/20 Iteration 202/3560 Training loss: 2.5818 2.9995 sec/batch
Epoch 2/20 Iteration 203/3560 Training loss: 2.5791 3.1201 sec/batch
Epoch 2/20 Iteration 204/3560 Training loss: 2.5771 3.2475 sec/batch
Epoch 2/20 Iteration 205/3560 Training loss: 2.5746 3.1626 sec/batch
Epoch 2/20 Iteration 206/3560 Training loss: 2.5719 3.1963 sec/batch
Epoch 2/20 Iteration 207/3560 Training loss: 2.5700 3.1583 sec/batch
Epoch 2/20 Iteration 208/3560 Training loss: 2.5677 3.2221 sec/batch
Epoch 2/20 Iteration 209/3560 Training loss: 2.5662 3.2864 sec/batch
Epoch 2/20 Iteration 210/3560 Training loss: 2.5639 3.1944 sec/batch
Epoch 2/20 Iteration 211/3560 Training loss: 2.5612 3.2436 sec/batch
Epoch 2/20 Iteration 212/3560 Training loss: 2.5595 3.2160 sec/batch
Epoch 2/20 Iteration 213/3560 Training loss: 2.5573 3.3034 sec/batch
Epoch 2/20 Iteration 214/3560 Training loss: 2.5557 3.2762 sec/batch
Epoch 2/20 Iteration 215/3560 Training loss: 2.5535 3.2316 sec/batch
Epoch 2/20 Iteration 216/3560 Training loss: 2.5510 3.1904 sec/batch
Epoch 2/20 Iteration 217/3560 Training loss: 2.5486 3.2702 sec/batch
Epoch 2/20 Iteration 218/3560 Training loss: 2.5463 3.3667 sec/batch
Epoch 2/20 Iteration 219/3560 Training loss: 2.5441 4.1517 sec/batch
Epoch 2/20 Iteration 220/3560 Training loss: 2.5418 4.6749 sec/batch
Epoch 2/20 Iteration 221/3560 Training loss: 2.5395 5.4965 sec/batch
Epoch 2/20 Iteration 222/3560 Training loss: 2.5373 7.8256 sec/batch
Epoch 2/20 Iteration 223/3560 Training loss: 2.5351 3.6650 sec/batch
Epoch 2/20 Iteration 224/3560 Training loss: 2.5325 3.5078 sec/batch
Epoch 2/20 Iteration 225/3560 Training loss: 2.5309 3.7683 sec/batch
Epoch 2/20 Iteration 226/3560 Training loss: 2.5289 3.7207 sec/batch
Epoch 2/20 Iteration 227/3560 Training loss: 2.5270 3.5257 sec/batch
Epoch 2/20 Iteration 228/3560 Training loss: 2.5256 3.5338 sec/batch
Epoch 2/20 Iteration 229/3560 Training loss: 2.5235 3.7173 sec/batch
Epoch 2/20 Iteration 230/3560 Training loss: 2.5219 3.6993 sec/batch
Epoch 2/20 Iteration 231/3560 Training loss: 2.5200 3.7550 sec/batch
Epoch 2/20 Iteration 232/3560 Training loss: 2.5181 4.0213 sec/batch
Epoch 2/20 Iteration 233/3560 Training loss: 2.5161 3.6660 sec/batch
Epoch 2/20 Iteration 234/3560 Training loss: 2.5146 3.8646 sec/batch
Epoch 2/20 Iteration 235/3560 Training loss: 2.5129 3.6753 sec/batch
Epoch 2/20 Iteration 236/3560 Training loss: 2.5111 3.6534 sec/batch
Epoch 2/20 Iteration 237/3560 Training loss: 2.5094 3.7716 sec/batch
Epoch 2/20 Iteration 238/3560 Training loss: 2.5079 4.1689 sec/batch
Epoch 2/20 Iteration 239/3560 Training loss: 2.5062 4.0215 sec/batch
Epoch 2/20 Iteration 240/3560 Training loss: 2.5047 3.7538 sec/batch
Epoch 2/20 Iteration 241/3560 Training loss: 2.5033 4.1904 sec/batch
Epoch 2/20 Iteration 242/3560 Training loss: 2.5016 4.5269 sec/batch
Epoch 2/20 Iteration 243/3560 Training loss: 2.4999 3.9258 sec/batch
Epoch 2/20 Iteration 244/3560 Training loss: 2.4986 4.0550 sec/batch
Epoch 2/20 Iteration 245/3560 Training loss: 2.4969 3.6947 sec/batch
Epoch 2/20 Iteration 246/3560 Training loss: 2.4950 3.6659 sec/batch
Epoch 2/20 Iteration 247/3560 Training loss: 2.4931 3.6466 sec/batch
Epoch 2/20 Iteration 248/3560 Training loss: 2.4917 3.5664 sec/batch
Epoch 2/20 Iteration 249/3560 Training loss: 2.4904 3.5674 sec/batch
Epoch 2/20 Iteration 250/3560 Training loss: 2.4890 3.9138 sec/batch
Epoch 2/20 Iteration 251/3560 Training loss: 2.4874 3.5385 sec/batch
Epoch 2/20 Iteration 252/3560 Training loss: 2.4857 3.6033 sec/batch
Epoch 2/20 Iteration 253/3560 Training loss: 2.4841 3.6798 sec/batch
Epoch 2/20 Iteration 254/3560 Training loss: 2.4832 3.6224 sec/batch
Epoch 2/20 Iteration 255/3560 Training loss: 2.4817 3.4921 sec/batch
Epoch 2/20 Iteration 256/3560 Training loss: 2.4803 3.6121 sec/batch
Epoch 2/20 Iteration 257/3560 Training loss: 2.4788 3.7026 sec/batch
Epoch 2/20 Iteration 258/3560 Training loss: 2.4773 3.6142 sec/batch
Epoch 2/20 Iteration 259/3560 Training loss: 2.4757 3.5525 sec/batch
Epoch 2/20 Iteration 260/3560 Training loss: 2.4744 3.6513 sec/batch
Epoch 2/20 Iteration 261/3560 Training loss: 2.4729 3.4375 sec/batch
Epoch 2/20 Iteration 262/3560 Training loss: 2.4712 3.4963 sec/batch
Epoch 2/20 Iteration 263/3560 Training loss: 2.4694 3.4685 sec/batch
Epoch 2/20 Iteration 264/3560 Training loss: 2.4677 3.5628 sec/batch
Epoch 2/20 Iteration 265/3560 Training loss: 2.4663 3.4849 sec/batch
Epoch 2/20 Iteration 266/3560 Training loss: 2.4648 3.4936 sec/batch
Epoch 2/20 Iteration 267/3560 Training loss: 2.4633 3.7143 sec/batch
Epoch 2/20 Iteration 268/3560 Training loss: 2.4621 3.4759 sec/batch
Epoch 2/20 Iteration 269/3560 Training loss: 2.4606 3.4658 sec/batch
Epoch 2/20 Iteration 270/3560 Training loss: 2.4593 3.5042 sec/batch
Epoch 2/20 Iteration 271/3560 Training loss: 2.4577 3.5448 sec/batch
Epoch 2/20 Iteration 272/3560 Training loss: 2.4562 3.5288 sec/batch
Epoch 2/20 Iteration 273/3560 Training loss: 2.4546 3.5178 sec/batch
Epoch 2/20 Iteration 274/3560 Training loss: 2.4530 3.5031 sec/batch
Epoch 2/20 Iteration 275/3560 Training loss: 2.4516 3.5702 sec/batch
Epoch 2/20 Iteration 276/3560 Training loss: 2.4502 3.5981 sec/batch
Epoch 2/20 Iteration 277/3560 Training loss: 2.4486 3.5880 sec/batch
Epoch 2/20 Iteration 278/3560 Training loss: 2.4471 3.5751 sec/batch
Epoch 2/20 Iteration 279/3560 Training loss: 2.4459 3.5375 sec/batch
Epoch 2/20 Iteration 280/3560 Training loss: 2.4446 3.4625 sec/batch
Epoch 2/20 Iteration 281/3560 Training loss: 2.4429 3.5610 sec/batch
Epoch 2/20 Iteration 282/3560 Training loss: 2.4415 3.4839 sec/batch
Epoch 2/20 Iteration 283/3560 Training loss: 2.4399 3.6444 sec/batch
Epoch 2/20 Iteration 284/3560 Training loss: 2.4386 3.7748 sec/batch
Epoch 2/20 Iteration 285/3560 Training loss: 2.4372 3.6727 sec/batch
Epoch 2/20 Iteration 286/3560 Training loss: 2.4362 3.4921 sec/batch
Epoch 2/20 Iteration 287/3560 Training loss: 2.4350 3.5367 sec/batch
Epoch 2/20 Iteration 288/3560 Training loss: 2.4335 3.5017 sec/batch
Epoch 2/20 Iteration 289/3560 Training loss: 2.4323 3.4742 sec/batch
Epoch 2/20 Iteration 290/3560 Training loss: 2.4312 3.5584 sec/batch
Epoch 2/20 Iteration 291/3560 Training loss: 2.4297 3.4782 sec/batch
Epoch 2/20 Iteration 292/3560 Training loss: 2.4283 3.6383 sec/batch
Epoch 2/20 Iteration 293/3560 Training loss: 2.4270 3.6520 sec/batch
Epoch 2/20 Iteration 294/3560 Training loss: 2.4254 3.5919 sec/batch
Epoch 2/20 Iteration 295/3560 Training loss: 2.4242 3.5306 sec/batch
Epoch 2/20 Iteration 296/3560 Training loss: 2.4230 3.5344 sec/batch
Epoch 2/20 Iteration 297/3560 Training loss: 2.4219 3.5983 sec/batch
Epoch 2/20 Iteration 298/3560 Training loss: 2.4207 3.7333 sec/batch
Epoch 2/20 Iteration 299/3560 Training loss: 2.4197 3.5214 sec/batch
Epoch 2/20 Iteration 300/3560 Training loss: 2.4184 3.4760 sec/batch
Epoch 2/20 Iteration 301/3560 Training loss: 2.4170 3.6052 sec/batch
Epoch 2/20 Iteration 302/3560 Training loss: 2.4160 3.5571 sec/batch
Epoch 2/20 Iteration 303/3560 Training loss: 2.4148 3.4767 sec/batch
Epoch 2/20 Iteration 304/3560 Training loss: 2.4134 3.3683 sec/batch
Epoch 2/20 Iteration 305/3560 Training loss: 2.4123 3.5128 sec/batch
Epoch 2/20 Iteration 306/3560 Training loss: 2.4112 3.6625 sec/batch
Epoch 2/20 Iteration 307/3560 Training loss: 2.4101 3.7991 sec/batch
Epoch 2/20 Iteration 308/3560 Training loss: 2.4090 3.6596 sec/batch
Epoch 2/20 Iteration 309/3560 Training loss: 2.4077 3.8375 sec/batch
Epoch 2/20 Iteration 310/3560 Training loss: 2.4063 3.6952 sec/batch
Epoch 2/20 Iteration 311/3560 Training loss: 2.4052 3.6199 sec/batch
Epoch 2/20 Iteration 312/3560 Training loss: 2.4042 3.4596 sec/batch
Epoch 2/20 Iteration 313/3560 Training loss: 2.4030 3.4610 sec/batch
Epoch 2/20 Iteration 314/3560 Training loss: 2.4019 3.7531 sec/batch
Epoch 2/20 Iteration 315/3560 Training loss: 2.4007 3.6191 sec/batch
Epoch 2/20 Iteration 316/3560 Training loss: 2.3995 3.7373 sec/batch
Epoch 2/20 Iteration 317/3560 Training loss: 2.3986 3.5963 sec/batch
Epoch 2/20 Iteration 318/3560 Training loss: 2.3974 3.4830 sec/batch
Epoch 2/20 Iteration 319/3560 Training loss: 2.3964 3.5146 sec/batch
Epoch 2/20 Iteration 320/3560 Training loss: 2.3952 3.7255 sec/batch
Epoch 2/20 Iteration 321/3560 Training loss: 2.3940 3.6138 sec/batch
Epoch 2/20 Iteration 322/3560 Training loss: 2.3928 3.6208 sec/batch
Epoch 2/20 Iteration 323/3560 Training loss: 2.3916 3.6851 sec/batch
Epoch 2/20 Iteration 324/3560 Training loss: 2.3907 3.4978 sec/batch
Epoch 2/20 Iteration 325/3560 Training loss: 2.3896 3.5152 sec/batch
Epoch 2/20 Iteration 326/3560 Training loss: 2.3887 3.6583 sec/batch
Epoch 2/20 Iteration 327/3560 Training loss: 2.3875 3.7458 sec/batch
Epoch 2/20 Iteration 328/3560 Training loss: 2.3863 3.7272 sec/batch
Epoch 2/20 Iteration 329/3560 Training loss: 2.3853 3.8111 sec/batch
Epoch 2/20 Iteration 330/3560 Training loss: 2.3846 3.6066 sec/batch
Epoch 2/20 Iteration 331/3560 Training loss: 2.3836 3.4464 sec/batch
Epoch 2/20 Iteration 332/3560 Training loss: 2.3826 3.4515 sec/batch
Epoch 2/20 Iteration 333/3560 Training loss: 2.3814 3.4929 sec/batch
Epoch 2/20 Iteration 334/3560 Training loss: 2.3804 3.5232 sec/batch
Epoch 2/20 Iteration 335/3560 Training loss: 2.3793 3.5260 sec/batch
Epoch 2/20 Iteration 336/3560 Training loss: 2.3782 3.4712 sec/batch
Epoch 2/20 Iteration 337/3560 Training loss: 2.3770 3.4710 sec/batch
Epoch 2/20 Iteration 338/3560 Training loss: 2.3761 3.4999 sec/batch
Epoch 2/20 Iteration 339/3560 Training loss: 2.3751 3.4674 sec/batch
Epoch 2/20 Iteration 340/3560 Training loss: 2.3740 3.5951 sec/batch
Epoch 2/20 Iteration 341/3560 Training loss: 2.3729 3.4632 sec/batch
Epoch 2/20 Iteration 342/3560 Training loss: 2.3719 3.4504 sec/batch
Epoch 2/20 Iteration 343/3560 Training loss: 2.3709 3.5946 sec/batch
Epoch 2/20 Iteration 344/3560 Training loss: 2.3698 3.5578 sec/batch
Epoch 2/20 Iteration 345/3560 Training loss: 2.3688 3.4090 sec/batch
Epoch 2/20 Iteration 346/3560 Training loss: 2.3679 3.5004 sec/batch
Epoch 2/20 Iteration 347/3560 Training loss: 2.3669 3.4928 sec/batch
Epoch 2/20 Iteration 348/3560 Training loss: 2.3658 3.4372 sec/batch
Epoch 2/20 Iteration 349/3560 Training loss: 2.3647 3.5193 sec/batch
Epoch 2/20 Iteration 350/3560 Training loss: 2.3636 3.4566 sec/batch
Epoch 2/20 Iteration 351/3560 Training loss: 2.3627 3.8176 sec/batch
Epoch 2/20 Iteration 352/3560 Training loss: 2.3618 3.4958 sec/batch
Epoch 2/20 Iteration 353/3560 Training loss: 2.3609 3.3985 sec/batch
Epoch 2/20 Iteration 354/3560 Training loss: 2.3598 3.4438 sec/batch
Epoch 2/20 Iteration 355/3560 Training loss: 2.3587 3.4242 sec/batch
Epoch 2/20 Iteration 356/3560 Training loss: 2.3577 3.3533 sec/batch
Epoch 3/20 Iteration 357/3560 Training loss: 2.2437 3.5711 sec/batch
Epoch 3/20 Iteration 358/3560 Training loss: 2.1931 3.5141 sec/batch
Epoch 3/20 Iteration 359/3560 Training loss: 2.1772 3.4167 sec/batch
Epoch 3/20 Iteration 360/3560 Training loss: 2.1698 3.6147 sec/batch
Epoch 3/20 Iteration 361/3560 Training loss: 2.1682 3.4680 sec/batch
Epoch 3/20 Iteration 362/3560 Training loss: 2.1630 3.5372 sec/batch
Epoch 3/20 Iteration 363/3560 Training loss: 2.1634 3.3590 sec/batch
Epoch 3/20 Iteration 364/3560 Training loss: 2.1630 3.4985 sec/batch
Epoch 3/20 Iteration 365/3560 Training loss: 2.1654 3.4886 sec/batch
Epoch 3/20 Iteration 366/3560 Training loss: 2.1643 3.4991 sec/batch
Epoch 3/20 Iteration 367/3560 Training loss: 2.1616 3.4421 sec/batch
Epoch 3/20 Iteration 368/3560 Training loss: 2.1595 3.4510 sec/batch
Epoch 3/20 Iteration 369/3560 Training loss: 2.1586 3.5853 sec/batch
Epoch 3/20 Iteration 370/3560 Training loss: 2.1602 3.3820 sec/batch
Epoch 3/20 Iteration 371/3560 Training loss: 2.1596 3.4125 sec/batch
Epoch 3/20 Iteration 372/3560 Training loss: 2.1580 3.4557 sec/batch
Epoch 3/20 Iteration 373/3560 Training loss: 2.1569 3.4660 sec/batch
Epoch 3/20 Iteration 374/3560 Training loss: 2.1589 3.3930 sec/batch
Epoch 3/20 Iteration 375/3560 Training loss: 2.1586 3.5050 sec/batch
Epoch 3/20 Iteration 376/3560 Training loss: 2.1573 3.4798 sec/batch
Epoch 3/20 Iteration 377/3560 Training loss: 2.1561 3.5703 sec/batch
Epoch 3/20 Iteration 378/3560 Training loss: 2.1570 3.5901 sec/batch
Epoch 3/20 Iteration 379/3560 Training loss: 2.1561 3.4656 sec/batch
Epoch 3/20 Iteration 380/3560 Training loss: 2.1545 3.4694 sec/batch
Epoch 3/20 Iteration 381/3560 Training loss: 2.1536 3.5110 sec/batch
Epoch 3/20 Iteration 382/3560 Training loss: 2.1523 3.3953 sec/batch
Epoch 3/20 Iteration 383/3560 Training loss: 2.1510 3.4979 sec/batch
Epoch 3/20 Iteration 384/3560 Training loss: 2.1505 3.4760 sec/batch
Epoch 3/20 Iteration 385/3560 Training loss: 2.1507 3.5566 sec/batch
Epoch 3/20 Iteration 386/3560 Training loss: 2.1500 3.5025 sec/batch
Epoch 3/20 Iteration 387/3560 Training loss: 2.1494 3.4164 sec/batch
Epoch 3/20 Iteration 388/3560 Training loss: 2.1483 3.4605 sec/batch
Epoch 3/20 Iteration 389/3560 Training loss: 2.1470 3.5625 sec/batch
Epoch 3/20 Iteration 390/3560 Training loss: 2.1472 3.5063 sec/batch
Epoch 3/20 Iteration 391/3560 Training loss: 2.1461 3.3897 sec/batch
Epoch 3/20 Iteration 392/3560 Training loss: 2.1453 3.5159 sec/batch
Epoch 3/20 Iteration 393/3560 Training loss: 2.1445 3.4524 sec/batch
Epoch 3/20 Iteration 394/3560 Training loss: 2.1427 3.4332 sec/batch
Epoch 3/20 Iteration 395/3560 Training loss: 2.1412 3.5364 sec/batch
Epoch 3/20 Iteration 396/3560 Training loss: 2.1396 3.5350 sec/batch
Epoch 3/20 Iteration 397/3560 Training loss: 2.1387 3.4916 sec/batch
Epoch 3/20 Iteration 398/3560 Training loss: 2.1378 3.4178 sec/batch
Epoch 3/20 Iteration 399/3560 Training loss: 2.1366 3.5025 sec/batch
Epoch 3/20 Iteration 400/3560 Training loss: 2.1353 3.6073 sec/batch
Validation loss: 2.00476 Saving checkpoint!
Epoch 3/20 Iteration 401/3560 Training loss: 2.1352 3.2739 sec/batch
Epoch 3/20 Iteration 402/3560 Training loss: 2.1332 3.2289 sec/batch
Epoch 3/20 Iteration 403/3560 Training loss: 2.1326 3.3329 sec/batch
Epoch 3/20 Iteration 404/3560 Training loss: 2.1313 3.4337 sec/batch
Epoch 3/20 Iteration 405/3560 Training loss: 2.1307 3.2948 sec/batch
Epoch 3/20 Iteration 406/3560 Training loss: 2.1306 3.3000 sec/batch
Epoch 3/20 Iteration 407/3560 Training loss: 2.1293 3.3923 sec/batch
Epoch 3/20 Iteration 408/3560 Training loss: 2.1292 3.6400 sec/batch
Epoch 3/20 Iteration 409/3560 Training loss: 2.1281 3.6734 sec/batch
Epoch 3/20 Iteration 410/3560 Training loss: 2.1273 3.6499 sec/batch
Epoch 3/20 Iteration 411/3560 Training loss: 2.1263 3.5331 sec/batch
Epoch 3/20 Iteration 412/3560 Training loss: 2.1257 3.3907 sec/batch
Epoch 3/20 Iteration 413/3560 Training loss: 2.1251 3.3519 sec/batch
Epoch 3/20 Iteration 414/3560 Training loss: 2.1243 3.4130 sec/batch
Epoch 3/20 Iteration 415/3560 Training loss: 2.1233 3.4009 sec/batch
Epoch 3/20 Iteration 416/3560 Training loss: 2.1231 3.5033 sec/batch
Epoch 3/20 Iteration 417/3560 Training loss: 2.1224 3.4227 sec/batch
Epoch 3/20 Iteration 418/3560 Training loss: 2.1223 3.4004 sec/batch
Epoch 3/20 Iteration 419/3560 Training loss: 2.1219 3.4352 sec/batch
Epoch 3/20 Iteration 420/3560 Training loss: 2.1214 3.5275 sec/batch
Epoch 3/20 Iteration 421/3560 Training loss: 2.1206 3.3541 sec/batch
Epoch 3/20 Iteration 422/3560 Training loss: 2.1203 3.4383 sec/batch
Epoch 3/20 Iteration 423/3560 Training loss: 2.1196 3.3642 sec/batch
Epoch 3/20 Iteration 424/3560 Training loss: 2.1186 3.4478 sec/batch
Epoch 3/20 Iteration 425/3560 Training loss: 2.1177 3.4796 sec/batch
Epoch 3/20 Iteration 426/3560 Training loss: 2.1170 3.5107 sec/batch
Epoch 3/20 Iteration 427/3560 Training loss: 2.1169 3.3408 sec/batch
Epoch 3/20 Iteration 428/3560 Training loss: 2.1162 3.5679 sec/batch
Epoch 3/20 Iteration 429/3560 Training loss: 2.1157 3.4614 sec/batch
Epoch 3/20 Iteration 430/3560 Training loss: 2.1147 3.4281 sec/batch
Epoch 3/20 Iteration 431/3560 Training loss: 2.1140 3.4657 sec/batch
Epoch 3/20 Iteration 432/3560 Training loss: 2.1137 3.4233 sec/batch
Epoch 3/20 Iteration 433/3560 Training loss: 2.1129 3.4102 sec/batch
Epoch 3/20 Iteration 434/3560 Training loss: 2.1124 3.4704 sec/batch
Epoch 3/20 Iteration 435/3560 Training loss: 2.1114 3.3869 sec/batch
Epoch 3/20 Iteration 436/3560 Training loss: 2.1104 3.4189 sec/batch
Epoch 3/20 Iteration 437/3560 Training loss: 2.1093 3.4893 sec/batch
Epoch 3/20 Iteration 438/3560 Training loss: 2.1087 3.4852 sec/batch
Epoch 3/20 Iteration 439/3560 Training loss: 2.1076 3.3463 sec/batch
Epoch 3/20 Iteration 440/3560 Training loss: 2.1068 3.4419 sec/batch
Epoch 3/20 Iteration 441/3560 Training loss: 2.1057 3.3745 sec/batch
Epoch 3/20 Iteration 442/3560 Training loss: 2.1047 3.4556 sec/batch
Epoch 3/20 Iteration 443/3560 Training loss: 2.1039 3.5676 sec/batch
Epoch 3/20 Iteration 444/3560 Training loss: 2.1029 3.6193 sec/batch
Epoch 3/20 Iteration 445/3560 Training loss: 2.1019 3.5128 sec/batch
Epoch 3/20 Iteration 446/3560 Training loss: 2.1013 3.4297 sec/batch
Epoch 3/20 Iteration 447/3560 Training loss: 2.1004 3.4581 sec/batch
Epoch 3/20 Iteration 448/3560 Training loss: 2.0997 3.3935 sec/batch
Epoch 3/20 Iteration 449/3560 Training loss: 2.0985 3.4951 sec/batch
Epoch 3/20 Iteration 450/3560 Training loss: 2.0975 3.4030 sec/batch
Epoch 3/20 Iteration 451/3560 Training loss: 2.0965 3.4369 sec/batch
Epoch 3/20 Iteration 452/3560 Training loss: 2.0956 3.4439 sec/batch
Epoch 3/20 Iteration 453/3560 Training loss: 2.0948 3.4653 sec/batch
Epoch 3/20 Iteration 454/3560 Training loss: 2.0939 3.4878 sec/batch
Epoch 3/20 Iteration 455/3560 Training loss: 2.0929 3.5240 sec/batch
Epoch 3/20 Iteration 456/3560 Training loss: 2.0918 3.4555 sec/batch
Epoch 3/20 Iteration 457/3560 Training loss: 2.0911 3.3506 sec/batch
Epoch 3/20 Iteration 458/3560 Training loss: 2.0905 3.5004 sec/batch
Epoch 3/20 Iteration 459/3560 Training loss: 2.0896 3.4579 sec/batch
Epoch 3/20 Iteration 460/3560 Training loss: 2.0887 3.4276 sec/batch
Epoch 3/20 Iteration 461/3560 Training loss: 2.0878 3.4139 sec/batch
Epoch 3/20 Iteration 462/3560 Training loss: 2.0872 3.4099 sec/batch
Epoch 3/20 Iteration 463/3560 Training loss: 2.0865 3.5711 sec/batch
Epoch 3/20 Iteration 464/3560 Training loss: 2.0860 3.5083 sec/batch
Epoch 3/20 Iteration 465/3560 Training loss: 2.0854 3.3925 sec/batch
Epoch 3/20 Iteration 466/3560 Training loss: 2.0848 3.4608 sec/batch
Epoch 3/20 Iteration 467/3560 Training loss: 2.0841 3.4524 sec/batch
Epoch 3/20 Iteration 468/3560 Training loss: 2.0835 3.4335 sec/batch
Epoch 3/20 Iteration 469/3560 Training loss: 2.0828 3.4976 sec/batch
Epoch 3/20 Iteration 470/3560 Training loss: 2.0820 3.4470 sec/batch
Epoch 3/20 Iteration 471/3560 Training loss: 2.0812 3.5006 sec/batch
Epoch 3/20 Iteration 472/3560 Training loss: 2.0803 3.5864 sec/batch
Epoch 3/20 Iteration 473/3560 Training loss: 2.0796 3.4303 sec/batch
Epoch 3/20 Iteration 474/3560 Training loss: 2.0789 3.4474 sec/batch
Epoch 3/20 Iteration 475/3560 Training loss: 2.0783 3.4229 sec/batch
Epoch 3/20 Iteration 476/3560 Training loss: 2.0776 3.6071 sec/batch
Epoch 3/20 Iteration 477/3560 Training loss: 2.0771 3.3828 sec/batch
Epoch 3/20 Iteration 478/3560 Training loss: 2.0763 3.4899 sec/batch
Epoch 3/20 Iteration 479/3560 Training loss: 2.0755 3.4805 sec/batch
Epoch 3/20 Iteration 480/3560 Training loss: 2.0750 3.4524 sec/batch
Epoch 3/20 Iteration 481/3560 Training loss: 2.0743 3.5210 sec/batch
Epoch 3/20 Iteration 482/3560 Training loss: 2.0734 3.4591 sec/batch
Epoch 3/20 Iteration 483/3560 Training loss: 2.0729 3.3120 sec/batch
Epoch 3/20 Iteration 484/3560 Training loss: 2.0724 3.4329 sec/batch
Epoch 3/20 Iteration 485/3560 Training loss: 2.0717 3.4480 sec/batch
Epoch 3/20 Iteration 486/3560 Training loss: 2.0711 3.4566 sec/batch
Epoch 3/20 Iteration 487/3560 Training loss: 2.0703 3.3776 sec/batch
Epoch 3/20 Iteration 488/3560 Training loss: 2.0694 3.3883 sec/batch
Epoch 3/20 Iteration 489/3560 Training loss: 2.0689 3.4468 sec/batch
Epoch 3/20 Iteration 490/3560 Training loss: 2.0683 3.5437 sec/batch
Epoch 3/20 Iteration 491/3560 Training loss: 2.0676 3.3667 sec/batch
Epoch 3/20 Iteration 492/3560 Training loss: 2.0671 3.3821 sec/batch
Epoch 3/20 Iteration 493/3560 Training loss: 2.0666 3.3966 sec/batch
Epoch 3/20 Iteration 494/3560 Training loss: 2.0660 3.4381 sec/batch
Epoch 3/20 Iteration 495/3560 Training loss: 2.0657 3.4313 sec/batch
Epoch 3/20 Iteration 496/3560 Training loss: 2.0650 3.4395 sec/batch
Epoch 3/20 Iteration 497/3560 Training loss: 2.0645 3.3918 sec/batch
Epoch 3/20 Iteration 498/3560 Training loss: 2.0640 3.5077 sec/batch
Epoch 3/20 Iteration 499/3560 Training loss: 2.0635 3.4484 sec/batch
Epoch 3/20 Iteration 500/3560 Training loss: 2.0631 3.3679 sec/batch
Epoch 3/20 Iteration 501/3560 Training loss: 2.0625 3.3585 sec/batch
Epoch 3/20 Iteration 502/3560 Training loss: 2.0621 3.3873 sec/batch
Epoch 3/20 Iteration 503/3560 Training loss: 2.0617 3.3605 sec/batch
Epoch 3/20 Iteration 504/3560 Training loss: 2.0614 3.4441 sec/batch
Epoch 3/20 Iteration 505/3560 Training loss: 2.0609 3.4628 sec/batch
Epoch 3/20 Iteration 506/3560 Training loss: 2.0603 3.3874 sec/batch
Epoch 3/20 Iteration 507/3560 Training loss: 2.0597 3.7432 sec/batch
Epoch 3/20 Iteration 508/3560 Training loss: 2.0595 3.4050 sec/batch
Epoch 3/20 Iteration 509/3560 Training loss: 2.0590 3.4123 sec/batch
Epoch 3/20 Iteration 510/3560 Training loss: 2.0586 3.4366 sec/batch
Epoch 3/20 Iteration 511/3560 Training loss: 2.0581 3.3932 sec/batch
Epoch 3/20 Iteration 512/3560 Training loss: 2.0575 3.4530 sec/batch
Epoch 3/20 Iteration 513/3560 Training loss: 2.0570 3.5071 sec/batch
Epoch 3/20 Iteration 514/3560 Training loss: 2.0564 3.6016 sec/batch
Epoch 3/20 Iteration 515/3560 Training loss: 2.0557 3.3994 sec/batch
Epoch 3/20 Iteration 516/3560 Training loss: 2.0553 3.5066 sec/batch
Epoch 3/20 Iteration 517/3560 Training loss: 2.0549 3.4193 sec/batch
Epoch 3/20 Iteration 518/3560 Training loss: 2.0543 3.3902 sec/batch
Epoch 3/20 Iteration 519/3560 Training loss: 2.0538 3.4986 sec/batch
Epoch 3/20 Iteration 520/3560 Training loss: 2.0533 3.4189 sec/batch
Epoch 3/20 Iteration 521/3560 Training loss: 2.0527 3.4251 sec/batch
Epoch 3/20 Iteration 522/3560 Training loss: 2.0521 3.4720 sec/batch
Epoch 3/20 Iteration 523/3560 Training loss: 2.0516 3.4143 sec/batch
Epoch 3/20 Iteration 524/3560 Training loss: 2.0514 3.4661 sec/batch
Epoch 3/20 Iteration 525/3560 Training loss: 2.0508 3.8410 sec/batch
Epoch 3/20 Iteration 526/3560 Training loss: 2.0503 4.0102 sec/batch
Epoch 3/20 Iteration 527/3560 Training loss: 2.0496 4.1638 sec/batch
Epoch 3/20 Iteration 528/3560 Training loss: 2.0490 3.6846 sec/batch
Epoch 3/20 Iteration 529/3560 Training loss: 2.0487 3.7276 sec/batch
Epoch 3/20 Iteration 530/3560 Training loss: 2.0481 3.6248 sec/batch
Epoch 3/20 Iteration 531/3560 Training loss: 2.0477 3.5779 sec/batch
Epoch 3/20 Iteration 532/3560 Training loss: 2.0471 3.5898 sec/batch
Epoch 3/20 Iteration 533/3560 Training loss: 2.0464 3.6987 sec/batch
Epoch 3/20 Iteration 534/3560 Training loss: 2.0459 3.5303 sec/batch
Epoch 4/20 Iteration 535/3560 Training loss: 2.0155 3.5663 sec/batch
Epoch 4/20 Iteration 536/3560 Training loss: 1.9707 3.5746 sec/batch
Epoch 4/20 Iteration 537/3560 Training loss: 1.9576 3.6434 sec/batch
Epoch 4/20 Iteration 538/3560 Training loss: 1.9490 3.7762 sec/batch
Epoch 4/20 Iteration 539/3560 Training loss: 1.9479 3.5693 sec/batch
Epoch 4/20 Iteration 540/3560 Training loss: 1.9382 3.5781 sec/batch
Epoch 4/20 Iteration 541/3560 Training loss: 1.9388 3.6959 sec/batch
Epoch 4/20 Iteration 542/3560 Training loss: 1.9383 3.6016 sec/batch
Epoch 4/20 Iteration 543/3560 Training loss: 1.9417 3.5915 sec/batch
Epoch 4/20 Iteration 544/3560 Training loss: 1.9407 3.5681 sec/batch
Epoch 4/20 Iteration 545/3560 Training loss: 1.9367 3.5760 sec/batch
Epoch 4/20 Iteration 546/3560 Training loss: 1.9347 3.5134 sec/batch
Epoch 4/20 Iteration 547/3560 Training loss: 1.9345 3.6770 sec/batch
Epoch 4/20 Iteration 548/3560 Training loss: 1.9369 3.5577 sec/batch
Epoch 4/20 Iteration 549/3560 Training loss: 1.9358 3.6452 sec/batch
Epoch 4/20 Iteration 550/3560 Training loss: 1.9341 3.5558 sec/batch
Epoch 4/20 Iteration 551/3560 Training loss: 1.9331 3.5311 sec/batch
Epoch 4/20 Iteration 552/3560 Training loss: 1.9348 3.5512 sec/batch
Epoch 4/20 Iteration 553/3560 Training loss: 1.9343 3.5115 sec/batch
Epoch 4/20 Iteration 554/3560 Training loss: 1.9342 3.5989 sec/batch
Epoch 4/20 Iteration 555/3560 Training loss: 1.9339 3.4407 sec/batch
Epoch 4/20 Iteration 556/3560 Training loss: 1.9347 3.5235 sec/batch
Epoch 4/20 Iteration 557/3560 Training loss: 1.9336 3.5759 sec/batch
Epoch 4/20 Iteration 558/3560 Training loss: 1.9329 3.7992 sec/batch
Epoch 4/20 Iteration 559/3560 Training loss: 1.9325 3.5326 sec/batch
Epoch 4/20 Iteration 560/3560 Training loss: 1.9310 3.4765 sec/batch
Epoch 4/20 Iteration 561/3560 Training loss: 1.9299 3.9906 sec/batch
Epoch 4/20 Iteration 562/3560 Training loss: 1.9298 3.6825 sec/batch
Epoch 4/20 Iteration 563/3560 Training loss: 1.9306 3.5756 sec/batch
Epoch 4/20 Iteration 564/3560 Training loss: 1.9306 3.6341 sec/batch
Epoch 4/20 Iteration 565/3560 Training loss: 1.9301 3.4700 sec/batch
Epoch 4/20 Iteration 566/3560 Training loss: 1.9287 3.7177 sec/batch
Epoch 4/20 Iteration 567/3560 Training loss: 1.9286 3.5582 sec/batch
Epoch 4/20 Iteration 568/3560 Training loss: 1.9289 3.6231 sec/batch
Epoch 4/20 Iteration 569/3560 Training loss: 1.9282 3.5321 sec/batch
Epoch 4/20 Iteration 570/3560 Training loss: 1.9273 3.9020 sec/batch
Epoch 4/20 Iteration 571/3560 Training loss: 1.9267 3.5353 sec/batch
Epoch 4/20 Iteration 572/3560 Training loss: 1.9253 3.5637 sec/batch
Epoch 4/20 Iteration 573/3560 Training loss: 1.9238 3.5921 sec/batch
Epoch 4/20 Iteration 574/3560 Training loss: 1.9226 3.6701 sec/batch
Epoch 4/20 Iteration 575/3560 Training loss: 1.9218 3.6913 sec/batch
Epoch 4/20 Iteration 576/3560 Training loss: 1.9215 3.5500 sec/batch
Epoch 4/20 Iteration 577/3560 Training loss: 1.9204 3.5900 sec/batch
Epoch 4/20 Iteration 578/3560 Training loss: 1.9192 3.5883 sec/batch
Epoch 4/20 Iteration 579/3560 Training loss: 1.9190 3.5486 sec/batch
Epoch 4/20 Iteration 580/3560 Training loss: 1.9175 3.7628 sec/batch
Epoch 4/20 Iteration 581/3560 Training loss: 1.9170 3.5706 sec/batch
Epoch 4/20 Iteration 582/3560 Training loss: 1.9157 3.5512 sec/batch
Epoch 4/20 Iteration 583/3560 Training loss: 1.9150 3.7670 sec/batch
Epoch 4/20 Iteration 584/3560 Training loss: 1.9152 3.5573 sec/batch
Epoch 4/20 Iteration 585/3560 Training loss: 1.9142 3.5730 sec/batch
Epoch 4/20 Iteration 586/3560 Training loss: 1.9146 3.5810 sec/batch
Epoch 4/20 Iteration 587/3560 Training loss: 1.9137 3.5223 sec/batch
Epoch 4/20 Iteration 588/3560 Training loss: 1.9132 3.6315 sec/batch
Epoch 4/20 Iteration 589/3560 Training loss: 1.9124 3.5497 sec/batch
Epoch 4/20 Iteration 590/3560 Training loss: 1.9119 3.5588 sec/batch
Epoch 4/20 Iteration 591/3560 Training loss: 1.9117 3.7510 sec/batch
Epoch 4/20 Iteration 592/3560 Training loss: 1.9111 3.6515 sec/batch
Epoch 4/20 Iteration 593/3560 Training loss: 1.9102 3.4837 sec/batch
Epoch 4/20 Iteration 594/3560 Training loss: 1.9105 3.6351 sec/batch
Epoch 4/20 Iteration 595/3560 Training loss: 1.9100 3.5601 sec/batch
Epoch 4/20 Iteration 596/3560 Training loss: 1.9102 3.5346 sec/batch
Epoch 4/20 Iteration 597/3560 Training loss: 1.9103 3.5422 sec/batch
Epoch 4/20 Iteration 598/3560 Training loss: 1.9100 3.5920 sec/batch
Epoch 4/20 Iteration 599/3560 Training loss: 1.9095 3.5940 sec/batch
Epoch 4/20 Iteration 600/3560 Training loss: 1.9096 3.7192 sec/batch
Validation loss: 1.77728 Saving checkpoint!
Epoch 4/20 Iteration 601/3560 Training loss: 1.9098 3.2232 sec/batch
Epoch 4/20 Iteration 602/3560 Training loss: 1.9091 3.3873 sec/batch
Epoch 4/20 Iteration 603/3560 Training loss: 1.9084 3.6261 sec/batch
Epoch 4/20 Iteration 604/3560 Training loss: 1.9078 3.4410 sec/batch
Epoch 4/20 Iteration 605/3560 Training loss: 1.9080 3.4504 sec/batch
Epoch 4/20 Iteration 606/3560 Training loss: 1.9076 3.5969 sec/batch
Epoch 4/20 Iteration 607/3560 Training loss: 1.9075 3.5180 sec/batch
Epoch 4/20 Iteration 608/3560 Training loss: 1.9069 3.5160 sec/batch
Epoch 4/20 Iteration 609/3560 Training loss: 1.9062 3.5251 sec/batch
Epoch 4/20 Iteration 610/3560 Training loss: 1.9061 3.4714 sec/batch
Epoch 4/20 Iteration 611/3560 Training loss: 1.9057 3.6069 sec/batch
Epoch 4/20 Iteration 612/3560 Training loss: 1.9054 3.5291 sec/batch
Epoch 4/20 Iteration 613/3560 Training loss: 1.9044 3.5462 sec/batch
Epoch 4/20 Iteration 614/3560 Training loss: 1.9040 3.7194 sec/batch
Epoch 4/20 Iteration 615/3560 Training loss: 1.9031 3.4590 sec/batch
Epoch 4/20 Iteration 616/3560 Training loss: 1.9029 3.5354 sec/batch
Epoch 4/20 Iteration 617/3560 Training loss: 1.9020 3.6022 sec/batch
Epoch 4/20 Iteration 618/3560 Training loss: 1.9015 3.5115 sec/batch
Epoch 4/20 Iteration 619/3560 Training loss: 1.9007 3.5566 sec/batch
Epoch 4/20 Iteration 620/3560 Training loss: 1.9000 3.5780 sec/batch
Epoch 4/20 Iteration 621/3560 Training loss: 1.8993 3.4886 sec/batch
Epoch 4/20 Iteration 622/3560 Training loss: 1.8987 3.5585 sec/batch
Epoch 4/20 Iteration 623/3560 Training loss: 1.8979 3.6548 sec/batch
Epoch 4/20 Iteration 624/3560 Training loss: 1.8977 3.5601 sec/batch
Epoch 4/20 Iteration 625/3560 Training loss: 1.8971 3.5361 sec/batch
Epoch 4/20 Iteration 626/3560 Training loss: 1.8965 3.5743 sec/batch
Epoch 4/20 Iteration 627/3560 Training loss: 1.8956 3.5134 sec/batch
Epoch 4/20 Iteration 628/3560 Training loss: 1.8949 3.5093 sec/batch
Epoch 4/20 Iteration 629/3560 Training loss: 1.8941 3.5626 sec/batch
Epoch 4/20 Iteration 630/3560 Training loss: 1.8937 3.5798 sec/batch
Epoch 4/20 Iteration 631/3560 Training loss: 1.8931 3.6149 sec/batch
Epoch 4/20 Iteration 632/3560 Training loss: 1.8925 3.5110 sec/batch
Epoch 4/20 Iteration 633/3560 Training loss: 1.8916 3.5241 sec/batch
Epoch 4/20 Iteration 634/3560 Training loss: 1.8908 3.7574 sec/batch
Epoch 4/20 Iteration 635/3560 Training loss: 1.8903 3.5520 sec/batch
Epoch 4/20 Iteration 636/3560 Training loss: 1.8898 3.6130 sec/batch
Epoch 4/20 Iteration 637/3560 Training loss: 1.8892 3.5543 sec/batch
Epoch 4/20 Iteration 638/3560 Training loss: 1.8887 3.5672 sec/batch
Epoch 4/20 Iteration 639/3560 Training loss: 1.8880 3.6402 sec/batch
Epoch 4/20 Iteration 640/3560 Training loss: 1.8875 3.6008 sec/batch
Epoch 4/20 Iteration 641/3560 Training loss: 1.8871 3.5615 sec/batch
Epoch 4/20 Iteration 642/3560 Training loss: 1.8866 3.5238 sec/batch
Epoch 4/20 Iteration 643/3560 Training loss: 1.8863 3.6009 sec/batch
Epoch 4/20 Iteration 644/3560 Training loss: 1.8860 3.5622 sec/batch
Epoch 4/20 Iteration 645/3560 Training loss: 1.8855 3.5353 sec/batch
Epoch 4/20 Iteration 646/3560 Training loss: 1.8849 3.5415 sec/batch
Epoch 4/20 Iteration 647/3560 Training loss: 1.8843 4.1589 sec/batch
Epoch 4/20 Iteration 648/3560 Training loss: 1.8838 4.2808 sec/batch
Epoch 4/20 Iteration 649/3560 Training loss: 1.8831 3.5366 sec/batch
Epoch 4/20 Iteration 650/3560 Training loss: 1.8824 3.9432 sec/batch
Epoch 4/20 Iteration 651/3560 Training loss: 1.8820 4.0091 sec/batch
Epoch 4/20 Iteration 652/3560 Training loss: 1.8816 3.7540 sec/batch
Epoch 4/20 Iteration 653/3560 Training loss: 1.8811 3.8093 sec/batch
Epoch 4/20 Iteration 654/3560 Training loss: 1.8806 3.9266 sec/batch
Epoch 4/20 Iteration 655/3560 Training loss: 1.8802 3.7548 sec/batch
Epoch 4/20 Iteration 656/3560 Training loss: 1.8795 3.8668 sec/batch
Epoch 4/20 Iteration 657/3560 Training loss: 1.8789 3.7995 sec/batch
Epoch 4/20 Iteration 658/3560 Training loss: 1.8787 3.8348 sec/batch
Epoch 4/20 Iteration 659/3560 Training loss: 1.8782 3.7959 sec/batch
Epoch 4/20 Iteration 660/3560 Training loss: 1.8774 3.7619 sec/batch
Epoch 4/20 Iteration 661/3560 Training loss: 1.8771 3.7825 sec/batch
Epoch 4/20 Iteration 662/3560 Training loss: 1.8768 3.8489 sec/batch
Epoch 4/20 Iteration 663/3560 Training loss: 1.8764 3.8422 sec/batch
Epoch 4/20 Iteration 664/3560 Training loss: 1.8759 3.8589 sec/batch
Epoch 4/20 Iteration 665/3560 Training loss: 1.8752 3.7812 sec/batch
Epoch 4/20 Iteration 666/3560 Training loss: 1.8746 3.8125 sec/batch
Epoch 4/20 Iteration 667/3560 Training loss: 1.8742 3.7680 sec/batch
Epoch 4/20 Iteration 668/3560 Training loss: 1.8739 4.0675 sec/batch
Epoch 4/20 Iteration 669/3560 Training loss: 1.8735 3.8689 sec/batch
Epoch 4/20 Iteration 670/3560 Training loss: 1.8732 3.7972 sec/batch
Epoch 4/20 Iteration 671/3560 Training loss: 1.8729 3.9571 sec/batch
Epoch 4/20 Iteration 672/3560 Training loss: 1.8726 3.9359 sec/batch
Epoch 4/20 Iteration 673/3560 Training loss: 1.8723 3.7257 sec/batch
Epoch 4/20 Iteration 674/3560 Training loss: 1.8719 3.7962 sec/batch
Epoch 4/20 Iteration 675/3560 Training loss: 1.8717 3.7956 sec/batch
Epoch 4/20 Iteration 676/3560 Training loss: 1.8712 3.8008 sec/batch
Epoch 4/20 Iteration 677/3560 Training loss: 1.8708 3.8246 sec/batch
Epoch 4/20 Iteration 678/3560 Training loss: 1.8705 3.8842 sec/batch
Epoch 4/20 Iteration 679/3560 Training loss: 1.8700 3.9010 sec/batch
Epoch 4/20 Iteration 680/3560 Training loss: 1.8698 3.8250 sec/batch
Epoch 4/20 Iteration 681/3560 Training loss: 1.8695 3.8324 sec/batch
Epoch 4/20 Iteration 682/3560 Training loss: 1.8693 3.8120 sec/batch
Epoch 4/20 Iteration 683/3560 Training loss: 1.8690 3.8420 sec/batch
Epoch 4/20 Iteration 684/3560 Training loss: 1.8685 3.8356 sec/batch
Epoch 4/20 Iteration 685/3560 Training loss: 1.8679 3.8375 sec/batch
Epoch 4/20 Iteration 686/3560 Training loss: 1.8677 3.7908 sec/batch
Epoch 4/20 Iteration 687/3560 Training loss: 1.8674 3.9133 sec/batch
Epoch 4/20 Iteration 688/3560 Training loss: 1.8671 3.5274 sec/batch
Epoch 4/20 Iteration 689/3560 Training loss: 1.8668 3.5762 sec/batch
Epoch 4/20 Iteration 690/3560 Training loss: 1.8663 3.6319 sec/batch
Epoch 4/20 Iteration 691/3560 Training loss: 1.8661 3.9293 sec/batch
Epoch 4/20 Iteration 692/3560 Training loss: 1.8656 4.2168 sec/batch
Epoch 4/20 Iteration 693/3560 Training loss: 1.8651 3.4597 sec/batch
Epoch 4/20 Iteration 694/3560 Training loss: 1.8649 3.8203 sec/batch
Epoch 4/20 Iteration 695/3560 Training loss: 1.8647 4.1683 sec/batch
Epoch 4/20 Iteration 696/3560 Training loss: 1.8644 3.7559 sec/batch
Epoch 4/20 Iteration 697/3560 Training loss: 1.8641 3.6106 sec/batch
Epoch 4/20 Iteration 698/3560 Training loss: 1.8637 3.4835 sec/batch
Epoch 4/20 Iteration 699/3560 Training loss: 1.8633 3.5997 sec/batch
Epoch 4/20 Iteration 700/3560 Training loss: 1.8629 3.9833 sec/batch
Epoch 4/20 Iteration 701/3560 Training loss: 1.8627 3.4670 sec/batch
Epoch 4/20 Iteration 702/3560 Training loss: 1.8627 3.4414 sec/batch
Epoch 4/20 Iteration 703/3560 Training loss: 1.8622 3.5998 sec/batch
Epoch 4/20 Iteration 704/3560 Training loss: 1.8619 3.6977 sec/batch
Epoch 4/20 Iteration 705/3560 Training loss: 1.8614 3.7461 sec/batch
Epoch 4/20 Iteration 706/3560 Training loss: 1.8609 3.7063 sec/batch
Epoch 4/20 Iteration 707/3560 Training loss: 1.8607 3.5778 sec/batch
Epoch 4/20 Iteration 708/3560 Training loss: 1.8604 3.3909 sec/batch
Epoch 4/20 Iteration 709/3560 Training loss: 1.8601 3.6354 sec/batch
Epoch 4/20 Iteration 710/3560 Training loss: 1.8597 3.6231 sec/batch
Epoch 4/20 Iteration 711/3560 Training loss: 1.8592 3.5256 sec/batch
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-65-780c3a10820d> in <module>()
33 model.initial_state: new_state}
34 batch_loss, new_state, _ = sess.run([model.cost, model.final_state, model.optimizer],
---> 35 feed_dict=feed)
36 loss += batch_loss
37 end = time.time()
/Users/swapna/anaconda/envs/python3_5/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
765 try:
766 result = self._run(None, fetches, feed_dict, options_ptr,
--> 767 run_metadata_ptr)
768 if run_metadata:
769 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/Users/swapna/anaconda/envs/python3_5/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
963 if final_fetches or final_targets:
964 results = self._do_run(handle, final_targets, final_fetches,
--> 965 feed_dict_string, options, run_metadata)
966 else:
967 results = []
/Users/swapna/anaconda/envs/python3_5/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1013 if handle is None:
1014 return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1015 target_list, options, run_metadata)
1016 else:
1017 return self._do_call(_prun_fn, self._session, handle, feed_dict,
/Users/swapna/anaconda/envs/python3_5/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1020 def _do_call(self, fn, *args):
1021 try:
-> 1022 return fn(*args)
1023 except errors.OpError as e:
1024 message = compat.as_text(e.message)
/Users/swapna/anaconda/envs/python3_5/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
1002 return tf_session.TF_Run(session, options,
1003 feed_dict, fetch_list, target_list,
-> 1004 status, run_metadata)
1005
1006 def _prun_fn(session, handle, feed_dict, fetch_list):
KeyboardInterrupt: